{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 驱动器 C 中的卷是 OS\n",
      " 卷的序列号是 AE6F-00B3\n",
      "\n",
      " C:\\Users\\27420\\PycharmProjects\\训练\\gitee\\future-teddy\\teddy-B\\code\\data 的目录\n",
      "\n",
      "2021/11/14  10:44    <DIR>          .\n",
      "2021/11/14  10:44    <DIR>          ..\n",
      "2021/11/14  10:11           174,660 result1_1.xlsx\n",
      "2021/11/14  10:19           184,712 result1_2.xlsx\n",
      "2021/11/14  09:32           410,222 result2_1.xlsx\n",
      "2021/11/14  09:46            93,338 result2_2.xlsx\n",
      "2021/11/14  10:45            82,322 result3_2.xlsx\n",
      "2021/11/02  20:09           196,721 附件1.xlsx\n",
      "2021/11/02  20:09           575,770 附件2.xlsx\n",
      "2021/11/02  20:10            40,230 附件3.xlsx\n",
      "2021/11/02  20:11            22,801 附件4.xlsx\n",
      "               9 个文件      1,780,776 字节\n",
      "               2 个目录 111,132,004,352 可用字节\n"
     ]
    }
   ],
   "source": [
    "ls data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导包\n",
    "\n",
    "import array\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import datetime\n",
    "import jieba\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "matplotlib.rcParams['font.sans-serif'] = ['SimHei'] \n",
    "matplotlib.rcParams['font.family']='sans-serif'\n",
    "matplotlib.rcParams['axes.unicode_minus'] = False "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 任务 3.1 从文件“result2_1.xlsx”中提取发证日期中的年份，分析比较复混肥料中各组别不同年份产品登记数量的变化趋势。请在报告中给出处理思路及分析过程，使用合适的图表对结果进行可视化。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_excel('data/result2_1.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "      <th>企业名称</th>\n",
       "      <th>产品通用名称</th>\n",
       "      <th>产品形态</th>\n",
       "      <th>总氮百分比</th>\n",
       "      <th>P2O5百分比</th>\n",
       "      <th>K2O百分比</th>\n",
       "      <th>含氯情况</th>\n",
       "      <th>有机质百分比</th>\n",
       "      <th>正式登记证号</th>\n",
       "      <th>发证日期</th>\n",
       "      <th>有效期</th>\n",
       "      <th>产品商品名称</th>\n",
       "      <th>适用作物</th>\n",
       "      <th>总无机养分百分比</th>\n",
       "      <th>标签</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>嘉施利（应城）化肥有限公司</td>\n",
       "      <td>复混肥料</td>\n",
       "      <td>粒状</td>\n",
       "      <td>0.17</td>\n",
       "      <td>0.17</td>\n",
       "      <td>0.07</td>\n",
       "      <td>低氯</td>\n",
       "      <td>0.0</td>\n",
       "      <td>鄂农肥（2009）准字0004号</td>\n",
       "      <td>2014-08-15</td>\n",
       "      <td>2019-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.41</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>嘉施利（应城）化肥有限公司</td>\n",
       "      <td>复混肥料</td>\n",
       "      <td>粒状</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.05</td>\n",
       "      <td>0.15</td>\n",
       "      <td>无氯</td>\n",
       "      <td>0.0</td>\n",
       "      <td>鄂农肥（2009）准字0005号</td>\n",
       "      <td>2014-08-15</td>\n",
       "      <td>2019-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.40</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>嘉施利（应城）化肥有限公司</td>\n",
       "      <td>复混肥料</td>\n",
       "      <td>粒状</td>\n",
       "      <td>0.26</td>\n",
       "      <td>0.08</td>\n",
       "      <td>0.10</td>\n",
       "      <td>中氯</td>\n",
       "      <td>0.0</td>\n",
       "      <td>鄂农肥（2009）准字0006号</td>\n",
       "      <td>2014-08-15</td>\n",
       "      <td>2019-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.44</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>湖北澳特尔化工有限公司</td>\n",
       "      <td>复混肥料</td>\n",
       "      <td>粒状</td>\n",
       "      <td>0.15</td>\n",
       "      <td>0.15</td>\n",
       "      <td>0.15</td>\n",
       "      <td>无氯</td>\n",
       "      <td>0.0</td>\n",
       "      <td>鄂农肥（2009）准字00079号</td>\n",
       "      <td>2014-10-25</td>\n",
       "      <td>2019-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.45</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>嘉施利（应城）化肥有限公司</td>\n",
       "      <td>复混肥料</td>\n",
       "      <td>粒状</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.05</td>\n",
       "      <td>0.11</td>\n",
       "      <td>无氯</td>\n",
       "      <td>0.0</td>\n",
       "      <td>鄂农肥（2009）准字0007号</td>\n",
       "      <td>2014-08-15</td>\n",
       "      <td>2019-08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.36</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   序号           企业名称 产品通用名称 产品形态  总氮百分比  P2O5百分比  K2O百分比 含氯情况  有机质百分比  \\\n",
       "1   2  嘉施利（应城）化肥有限公司   复混肥料   粒状   0.17     0.17    0.07   低氯     0.0   \n",
       "2   3  嘉施利（应城）化肥有限公司   复混肥料   粒状   0.20     0.05    0.15   无氯     0.0   \n",
       "3   4  嘉施利（应城）化肥有限公司   复混肥料   粒状   0.26     0.08    0.10   中氯     0.0   \n",
       "4   5    湖北澳特尔化工有限公司   复混肥料   粒状   0.15     0.15    0.15   无氯     0.0   \n",
       "5   6  嘉施利（应城）化肥有限公司   复混肥料   粒状   0.20     0.05    0.11   无氯     0.0   \n",
       "\n",
       "              正式登记证号        发证日期      有效期 产品商品名称  适用作物  总无机养分百分比  标签  \n",
       "1   鄂农肥（2009）准字0004号  2014-08-15  2019-08    NaN   NaN      0.41   5  \n",
       "2   鄂农肥（2009）准字0005号  2014-08-15  2019-08    NaN   NaN      0.40   5  \n",
       "3   鄂农肥（2009）准字0006号  2014-08-15  2019-08    NaN   NaN      0.44   5  \n",
       "4  鄂农肥（2009）准字00079号  2014-10-25  2019-10    NaN   NaN      0.45   6  \n",
       "5   鄂农肥（2009）准字0007号  2014-08-15  2019-08    NaN   NaN      0.36   4  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 任务 3.2 从文件“result2_2.xlsx”中提取 2021 年 9 月 30 日仍有效的有机肥料产品，将完整的结果保存到文件“result3_2.xlsx”中。从有效产品中分别筛选出广西和湖北（根据正式登记证号区分）产品登记数量在前 5 的组别，分析两个省份上述组别的分布差异。请在报告中给出处理过程及分析结果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "      <th>企业名称</th>\n",
       "      <th>产品通用名称</th>\n",
       "      <th>产品形态</th>\n",
       "      <th>总氮百分比</th>\n",
       "      <th>P2O5百分比</th>\n",
       "      <th>K2O百分比</th>\n",
       "      <th>含氯情况</th>\n",
       "      <th>有机质百分比</th>\n",
       "      <th>正式登记证号</th>\n",
       "      <th>发证日期</th>\n",
       "      <th>有效期</th>\n",
       "      <th>产品商品名称</th>\n",
       "      <th>适用作物</th>\n",
       "      <th>总无机养分百分比</th>\n",
       "      <th>标签_1</th>\n",
       "      <th>标签_2</th>\n",
       "      <th>标签</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>230</th>\n",
       "      <td>231</td>\n",
       "      <td>湖北中化东方肥料有限公司</td>\n",
       "      <td>有机肥料</td>\n",
       "      <td>粉状</td>\n",
       "      <td>0.0267</td>\n",
       "      <td>0.0267</td>\n",
       "      <td>0.0267</td>\n",
       "      <td>无氯</td>\n",
       "      <td>0.60</td>\n",
       "      <td>鄂农肥（2009）准字0348号</td>\n",
       "      <td>2015-01-20</td>\n",
       "      <td>2020-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0801</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "      <td>7,1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>319</th>\n",
       "      <td>320</td>\n",
       "      <td>武汉市沃农肥业有限公司</td>\n",
       "      <td>有机肥料</td>\n",
       "      <td>粉状</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>无氯</td>\n",
       "      <td>0.45</td>\n",
       "      <td>鄂农肥（2010）准字0595号</td>\n",
       "      <td>2015-01-20</td>\n",
       "      <td>2020-01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0501</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1</td>\n",
       "      <td>6,1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>424</th>\n",
       "      <td>425</td>\n",
       "      <td>湖北太阳雨三农科技有限公司</td>\n",
       "      <td>有机肥料</td>\n",
       "      <td>粉状</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>无氯</td>\n",
       "      <td>0.45</td>\n",
       "      <td>鄂农肥（2010）准字0915号</td>\n",
       "      <td>2015/11/10</td>\n",
       "      <td>2020-11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0501</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1</td>\n",
       "      <td>6,1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>473</th>\n",
       "      <td>474</td>\n",
       "      <td>武汉裕龙生物科技有限责任公司</td>\n",
       "      <td>有机肥料</td>\n",
       "      <td>粒状</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>无氯</td>\n",
       "      <td>0.45</td>\n",
       "      <td>鄂农肥（2010）准字1116号</td>\n",
       "      <td>2015/11/20</td>\n",
       "      <td>2020-11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0501</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1</td>\n",
       "      <td>6,1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>538</th>\n",
       "      <td>539</td>\n",
       "      <td>湖北地利奥生物科技有限公司</td>\n",
       "      <td>有机肥料</td>\n",
       "      <td>粉状</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>0.0167</td>\n",
       "      <td>无氯</td>\n",
       "      <td>0.45</td>\n",
       "      <td>鄂农肥（2011）准字0038号</td>\n",
       "      <td>2016/03/22</td>\n",
       "      <td>2021-03</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0501</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1</td>\n",
       "      <td>6,1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      序号            企业名称 产品通用名称 产品形态   总氮百分比  P2O5百分比  K2O百分比 含氯情况  有机质百分比  \\\n",
       "230  231    湖北中化东方肥料有限公司   有机肥料   粉状  0.0267   0.0267  0.0267   无氯    0.60   \n",
       "319  320     武汉市沃农肥业有限公司   有机肥料   粉状  0.0167   0.0167  0.0167   无氯    0.45   \n",
       "424  425   湖北太阳雨三农科技有限公司   有机肥料   粉状  0.0167   0.0167  0.0167   无氯    0.45   \n",
       "473  474  武汉裕龙生物科技有限责任公司   有机肥料   粒状  0.0167   0.0167  0.0167   无氯    0.45   \n",
       "538  539   湖北地利奥生物科技有限公司   有机肥料   粉状  0.0167   0.0167  0.0167   无氯    0.45   \n",
       "\n",
       "               正式登记证号        发证日期      有效期 产品商品名称  适用作物  总无机养分百分比  标签_1  标签_2  \\\n",
       "230  鄂农肥（2009）准字0348号  2015-01-20  2020-01    NaN   NaN    0.0801   7.0     1   \n",
       "319  鄂农肥（2010）准字0595号  2015-01-20  2020-01    NaN   NaN    0.0501   6.0     1   \n",
       "424  鄂农肥（2010）准字0915号  2015/11/10  2020-11    NaN   NaN    0.0501   6.0     1   \n",
       "473  鄂农肥（2010）准字1116号  2015/11/20  2020-11    NaN   NaN    0.0501   6.0     1   \n",
       "538  鄂农肥（2011）准字0038号  2016/03/22  2021-03    NaN   NaN    0.0501   6.0     1   \n",
       "\n",
       "      标签  \n",
       "230  7,1  \n",
       "319  6,1  \n",
       "424  6,1  \n",
       "473  6,1  \n",
       "538  6,1  "
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#result2_2.xlsx就是有机肥料产品表单，不用进行产品通用名称处理\n",
    "data_1 = pd.read_excel('data/result2_2.xlsx')\n",
    "data_1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_1['有效期']= pd.to_datetime(data_1['有效期'],errors='coerce')\n",
    "data_2 = data_1[data_1['有效期'].dt.strftime(\"%Y-%m-%d\")>'2021-09-30']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "hubei = data_2[data_2['正式登记证号'].apply(lambda x : x[0]) == '鄂']\n",
    "guangxi = data_2[data_2['正式登记证号'].apply(lambda x : x[0]) == '桂']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>标签</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6,1</th>\n",
       "      <td>388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7,1</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8,1</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1,1</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9,1</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      序号\n",
       "标签      \n",
       "6,1  388\n",
       "7,1   13\n",
       "8,1    6\n",
       "1,1    1\n",
       "9,1    1"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hubei[['标签','序号']].groupby(by='标签').agg({'序号':'count'}).sort_values(['序号'], ascending = False).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>标签</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6,1</th>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7,1</th>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8,1</th>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9,1</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1,1</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      序号\n",
       "标签      \n",
       "6,1  400\n",
       "7,1   79\n",
       "8,1   19\n",
       "9,1    6\n",
       "1,1    2"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "guangxi[['标签','序号']].groupby(by='标签').agg({'序号':'count'}).sort_values(['序号'], ascending = False).head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_2.to_excel('data/result3_2.xlsx')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 任务 3.3 从附件 3 中提取产品登记数量大于 10 的肥料企业，给出这些企业所用到的原料集合（发酵菌剂除外）。以各企业用到的原料作为特征，计算企业之间的杰卡德相似系数矩阵，并将结果（保留4 位小数）保存到文件“result3_3.xlsx” 中（不提供模板文件，格式见表 1）。请在报告中给出处理思路、过程及相似系数矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "      <th>蘑菇渣</th>\n",
       "      <th>石灰渣</th>\n",
       "      <th>酒精发酵浓缩液</th>\n",
       "      <th>磷酸一铵</th>\n",
       "      <th>尿素</th>\n",
       "      <th>氧化钾</th>\n",
       "      <th>草木灰</th>\n",
       "      <th>氯化钾</th>\n",
       "      <th>甘蔗滤泥发酵熟料</th>\n",
       "      <th>...</th>\n",
       "      <th>牛骨粉</th>\n",
       "      <th>谷壳</th>\n",
       "      <th>生物菌剂</th>\n",
       "      <th>奶盖</th>\n",
       "      <th>酒糟粉</th>\n",
       "      <th>大颗粒磷酸二铵</th>\n",
       "      <th>大颗粒硫酸钾</th>\n",
       "      <th>大颗粒硫酸铵</th>\n",
       "      <th>种</th>\n",
       "      <th>55磷酸一铵</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>企业名称</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ID1</th>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>29</td>\n",
       "      <td>28</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>22</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID10</th>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID100</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID101</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID102</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 295 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       序号  蘑菇渣  石灰渣  酒精发酵浓缩液  磷酸一铵  尿素  氧化钾  草木灰  氯化钾  甘蔗滤泥发酵熟料   ...    牛骨粉  \\\n",
       "企业名称                                                              ...          \n",
       "ID1    40    0    0        0    29  28    0    0   22         0   ...      0   \n",
       "ID10   11    0    0        0    11  11    0    0   11         0   ...      0   \n",
       "ID100   1    0    0        0     0   0    0    1    0         0   ...      0   \n",
       "ID101   1    0    0        0     0   0    0    1    0         0   ...      0   \n",
       "ID102   1    0    0        0     0   0    0    1    0         0   ...      0   \n",
       "\n",
       "       谷壳  生物菌剂  奶盖  酒糟粉  大颗粒磷酸二铵  大颗粒硫酸钾  大颗粒硫酸铵  种  55磷酸一铵  \n",
       "企业名称                                                          \n",
       "ID1     0     0   0    0        0       0       0  0       0  \n",
       "ID10    0     0   0    0        0       0       0  0       0  \n",
       "ID100   0     0   0    0        0       0       0  0       0  \n",
       "ID101   0     0   0    0        0       0       0  0       0  \n",
       "ID102   0     0   0    0        0       0       0  0       0  \n",
       "\n",
       "[5 rows x 295 columns]"
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_3 = pd.read_excel('data/附件3.xlsx')\n",
    "\n",
    "data_4 = data_3.groupby(by='企业名称').agg('count')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>序号</th>\n",
       "      <th>蘑菇渣</th>\n",
       "      <th>石灰渣</th>\n",
       "      <th>酒精发酵浓缩液</th>\n",
       "      <th>磷酸一铵</th>\n",
       "      <th>尿素</th>\n",
       "      <th>氧化钾</th>\n",
       "      <th>草木灰</th>\n",
       "      <th>氯化钾</th>\n",
       "      <th>甘蔗滤泥发酵熟料</th>\n",
       "      <th>...</th>\n",
       "      <th>牛骨粉</th>\n",
       "      <th>谷壳</th>\n",
       "      <th>生物菌剂</th>\n",
       "      <th>奶盖</th>\n",
       "      <th>酒糟粉</th>\n",
       "      <th>大颗粒磷酸二铵</th>\n",
       "      <th>大颗粒硫酸钾</th>\n",
       "      <th>大颗粒硫酸铵</th>\n",
       "      <th>种</th>\n",
       "      <th>55磷酸一铵</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>企业名称</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ID1</th>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>29</td>\n",
       "      <td>28</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>22</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID10</th>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID12</th>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID2</th>\n",
       "      <td>26</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>24</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID3</th>\n",
       "      <td>19</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>19</td>\n",
       "      <td>19</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 295 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      序号  蘑菇渣  石灰渣  酒精发酵浓缩液  磷酸一铵  尿素  氧化钾  草木灰  氯化钾  甘蔗滤泥发酵熟料   ...    牛骨粉  \\\n",
       "企业名称                                                             ...          \n",
       "ID1   40    0    0        0    29  28    0    0   22         0   ...      0   \n",
       "ID10  11    0    0        0    11  11    0    0   11         0   ...      0   \n",
       "ID12  11    0    0        0     2   4    0    0    7         0   ...      0   \n",
       "ID2   26    0    0        0    23  24    0    0   18         0   ...      0   \n",
       "ID3   19    0    0        0    19  19    0    0   15         0   ...      0   \n",
       "\n",
       "      谷壳  生物菌剂  奶盖  酒糟粉  大颗粒磷酸二铵  大颗粒硫酸钾  大颗粒硫酸铵  种  55磷酸一铵  \n",
       "企业名称                                                         \n",
       "ID1    0     0   0    0        0       0       0  0       0  \n",
       "ID10   0     0   0    0        0       0       0  0       0  \n",
       "ID12   0     0   0    0        0       0       0  0       0  \n",
       "ID2    0     0   0    0        0       0       0  0       0  \n",
       "ID3    0     0   0    0        0       0       0  0       0  \n",
       "\n",
       "[5 rows x 295 columns]"
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_5 = data_4[data_4['序号']>10]\n",
    "data_5.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 杰卡德相似系数矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID1</th>\n",
       "      <th>ID10</th>\n",
       "      <th>ID12</th>\n",
       "      <th>ID2</th>\n",
       "      <th>ID3</th>\n",
       "      <th>ID4</th>\n",
       "      <th>ID5</th>\n",
       "      <th>ID6</th>\n",
       "      <th>ID7</th>\n",
       "      <th>ID9</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ID1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID10</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID12</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID5</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID6</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID7</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID9</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      ID1  ID10  ID12  ID2  ID3  ID4  ID5  ID6  ID7  ID9\n",
       "ID1     0     0     0    0    0    0    0    0    0    0\n",
       "ID10    0     0     0    0    0    0    0    0    0    0\n",
       "ID12    0     0     0    0    0    0    0    0    0    0\n",
       "ID2     0     0     0    0    0    0    0    0    0    0\n",
       "ID3     0     0     0    0    0    0    0    0    0    0\n",
       "ID4     0     0     0    0    0    0    0    0    0    0\n",
       "ID5     0     0     0    0    0    0    0    0    0    0\n",
       "ID6     0     0     0    0    0    0    0    0    0    0\n",
       "ID7     0     0     0    0    0    0    0    0    0    0\n",
       "ID9     0     0     0    0    0    0    0    0    0    0"
      ]
     },
     "execution_count": 171,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 企业编号的列表\n",
    "company_list =  list(data_5.index)\n",
    "# 创建横坐标和纵坐标都为城市列表的矩阵,矩阵初始值为0\n",
    "df = pd.DataFrame(0, index=company_list, columns=['ID1', 'ID10', 'ID12', 'ID2', 'ID3', 'ID4', 'ID5', 'ID6', 'ID7', 'ID9'])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID1</th>\n",
       "      <th>ID10</th>\n",
       "      <th>ID12</th>\n",
       "      <th>ID2</th>\n",
       "      <th>ID3</th>\n",
       "      <th>ID4</th>\n",
       "      <th>ID5</th>\n",
       "      <th>ID6</th>\n",
       "      <th>ID7</th>\n",
       "      <th>ID9</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ID1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.2000</td>\n",
       "      <td>0.1250</td>\n",
       "      <td>0.1667</td>\n",
       "      <td>0.1250</td>\n",
       "      <td>0.2222</td>\n",
       "      <td>0.1111</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.0588</td>\n",
       "      <td>0.1250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID10</th>\n",
       "      <td>0.2000</td>\n",
       "      <td>1</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.2143</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.3333</td>\n",
       "      <td>0.5556</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.5556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID12</th>\n",
       "      <td>0.1250</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>1</td>\n",
       "      <td>0.3077</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.6000</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.2727</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID2</th>\n",
       "      <td>0.1667</td>\n",
       "      <td>0.2143</td>\n",
       "      <td>0.3077</td>\n",
       "      <td>1</td>\n",
       "      <td>0.3077</td>\n",
       "      <td>0.3125</td>\n",
       "      <td>0.3571</td>\n",
       "      <td>0.2143</td>\n",
       "      <td>0.2143</td>\n",
       "      <td>0.0625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID3</th>\n",
       "      <td>0.1250</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.3077</td>\n",
       "      <td>1</td>\n",
       "      <td>0.5000</td>\n",
       "      <td>0.3333</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.1667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID4</th>\n",
       "      <td>0.2222</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.3125</td>\n",
       "      <td>0.5000</td>\n",
       "      <td>1</td>\n",
       "      <td>0.4286</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.2857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID5</th>\n",
       "      <td>0.1111</td>\n",
       "      <td>0.3333</td>\n",
       "      <td>0.6000</td>\n",
       "      <td>0.3571</td>\n",
       "      <td>0.3333</td>\n",
       "      <td>0.4286</td>\n",
       "      <td>1</td>\n",
       "      <td>0.3333</td>\n",
       "      <td>0.4545</td>\n",
       "      <td>0.3333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID6</th>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.5556</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.2143</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.3333</td>\n",
       "      <td>1</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>0.2727</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID7</th>\n",
       "      <td>0.0588</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.2143</td>\n",
       "      <td>0.4000</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.4545</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>1</td>\n",
       "      <td>0.2727</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID9</th>\n",
       "      <td>0.1250</td>\n",
       "      <td>0.5556</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>0.0625</td>\n",
       "      <td>0.1667</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.3333</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         ID1    ID10    ID12     ID2     ID3     ID4     ID5     ID6     ID7  \\\n",
       "ID1        1  0.2000  0.1250  0.1667  0.1250  0.2222  0.1111  0.2857  0.0588   \n",
       "ID10  0.2000       1  0.4000  0.2143  0.2727  0.2857  0.3333  0.5556  0.4000   \n",
       "ID12  0.1250  0.4000       1  0.3077  0.4000  0.2857  0.6000  0.4000  0.4000   \n",
       "ID2   0.1667  0.2143  0.3077       1  0.3077  0.3125  0.3571  0.2143  0.2143   \n",
       "ID3   0.1250  0.2727  0.4000  0.3077       1  0.5000  0.3333  0.2727  0.4000   \n",
       "ID4   0.2222  0.2857  0.2857  0.3125  0.5000       1  0.4286  0.2857  0.2857   \n",
       "ID5   0.1111  0.3333  0.6000  0.3571  0.3333  0.4286       1  0.3333  0.4545   \n",
       "ID6   0.2857  0.5556  0.4000  0.2143  0.2727  0.2857  0.3333       1  0.2727   \n",
       "ID7   0.0588  0.4000  0.4000  0.2143  0.4000  0.2857  0.4545  0.2727       1   \n",
       "ID9   0.1250  0.5556  0.2727  0.0625  0.1667  0.2857  0.3333  0.2727  0.2727   \n",
       "\n",
       "         ID9  \n",
       "ID1   0.1250  \n",
       "ID10  0.5556  \n",
       "ID12  0.2727  \n",
       "ID2   0.0625  \n",
       "ID3   0.1667  \n",
       "ID4   0.2857  \n",
       "ID5   0.3333  \n",
       "ID6   0.2727  \n",
       "ID7   0.2727  \n",
       "ID9        1  "
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 循环企业列表，根据使用原料情况来求出每一个企业的相似度\n",
    "for x in company_list:\n",
    "    for y in company_list:\n",
    "        if x!=y:\n",
    "            # 分别取出“企业x”和“企业y”的原料使用情况\n",
    "            A = data_5.loc[x]\n",
    "            B = data_5.loc[y]\n",
    "            # 交集\n",
    "            if 0 in list(set(data_5.loc[x]).intersection(set(data_5.loc[y]))):\n",
    "                intersections = len(list(set(data_5.loc[x]).intersection(set(data_5.loc[y]))))-1\n",
    "            else:\n",
    "                intersections = len(list(set(data_5.loc[x]).intersection(set(data_5.loc[y]))))\n",
    "            # 并集\n",
    "            if 0 in list(set(data_5.loc[x]).union(set(data_5.loc[y]))):\n",
    "                unions = len(list(set(data_5.loc[x]).union(set(data_5.loc[y]))))-1\n",
    "            else:\n",
    "                unions = len(list(set(data_5.loc[x]).union(set(data_5.loc[y]))))\n",
    "            # 计算相似度\n",
    "            same_rate = intersections/unions\n",
    "            # 将相似度放入矩阵\n",
    "            df.loc[x,y] = format(same_rate,'.4f')\n",
    "        else:\n",
    "            df.loc[x,y] = 1\n",
    "df.to_excel('data/result3_3.xlsx')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
